Designing and evaluating an online reinforcement learning agent for physical exercise recommendations in N-of-1 trials¶
This notebook is used to evaluate the performance for the RL agent using data from the simulate_data.ipynb notebook.
Imports¶
In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
# Generic Imports
import pandas as pd
import pandas
import seaborn as sns
import matplotlib.pylab as plt
import numpy as np
import arviz as az
import arviz
import numpy
import hvplot
import holoviews
import random
import itertools
from tqdm.notebook import tqdm
from bokeh.io import show
from bokeh.layouts import column
import jsonpickle
In [3]:
# Simulation Library Imports
from nof1rl4endo import *
Load from disk¶
In [4]:
## Load jsonpickle from disk:
with open("../data/2023-09-20-series.json", "r") as file:
calculated_series, config_to_simulation_data = jsonpickle.decode(file.read())
Analysis¶
Allocations¶
In [5]:
titles = [
f"{entry['result'].simulations[0].model}, {entry['result'].simulations[0].policy}"
for entry in calculated_series
]
def plot_allocations(calculated_series, titles, treatment_name):
panels = [
entry["result"].plot_allocations(treatment_name) for entry in calculated_series
]
for title, panel in zip(titles, panels):
panel.opts(title=title)
return holoviews.Layout(panels).cols(1)
plot_allocations(calculated_series, titles, treatment_name="selection_index")
Out[5]:
Custom Metric Definitions and Processing¶
In [6]:
# Filter only the observations where Thompson Sampling was used
ab_randomization_str = "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)'])"
ba_randomization_str = "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)', 'SelectionPolicyFixedPolicy\\n'])"
CUT_TO_ADAPTIVE_INTERVENTIONS = True
if CUT_TO_ADAPTIVE_INTERVENTIONS:
ab_randomization_start = 21
ba_randomization_start = 7
for entry in calculated_series:
start = None
if entry["configuration"]["policy"] == ab_randomization_str:
start = ab_randomization_start
if entry["configuration"]["policy"] == ba_randomization_str:
start = ba_randomization_start
if start is not None:
result = entry["result"]
simulations = result.simulations
for simulation in simulations:
simulation.history = simulation.history[start : start + 14]
series = [entry["result"] for entry in calculated_series]
In [7]:
def switch_to_fixed_policy(config):
config = config.copy()
config[
"policy"
] = "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n'])"
return config
metrics = [
Entropy(outcome_name="type"),
StandardDeviation(treatment_name="intensity"),
StandardDeviation(treatment_name="duration"),
RegretAgainstOtherConfiguration(
outcome_name="pain_reduction",
name="FixedPolicy",
configuration_transform_function=switch_to_fixed_policy,
config_to_simulation_data=config_to_simulation_data,
),
ProbabilityIntervalMin(
policy_filter_names=["ab_randomization_str", "ba_randomization_str"]
),
ProbabilityIntervalMax(
policy_filter_names=["ab_randomization_str", "ba_randomization_str"]
),
]
Plot sum of rewards (debug output)¶
In [8]:
SeriesOfSimulationsData.plot_lines(
series, [SimpleRegret(outcome_name="pain_reduction")], legend_position=(0, 1.75)
)
Out[8]:
<Axes: xlabel='t', ylabel='Regret'>
Paper output generation¶
Table¶
In [9]:
name_mapping = {
"ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n'])": "Fixed",
"ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)'])": "AB Randomization",
"ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)', 'SelectionPolicyFixedPolicy\\n'])": "BA Randomization",
}
df = SeriesOfSimulationsData.score_data(
series, metrics, {"policy": lambda x: name_mapping[x]}
)
filtered_df = df.loc[
(
(df["policy"] == "AB Randomization") & (df["t"] == 34)
| (df["policy"] == "BA Randomization") & (df["t"] == 20)
| (df["policy"] == "Fixed") & (df["t"] == 34)
)
]
filtered_df
Out[9]:
| t | score | simulation | patient_id | model | policy | metric | |
|---|---|---|---|---|---|---|---|
| 34 | 34 | 1.290845 | SimulationData | 0 | Scenario I | Fixed | Entropy (type) |
| 34 | 34 | 1.290845 | SimulationData | 1 | Scenario I | Fixed | Entropy (type) |
| 34 | 34 | 1.290845 | SimulationData | 2 | Scenario I | Fixed | Entropy (type) |
| 34 | 34 | 1.290845 | SimulationData | 3 | Scenario I | Fixed | Entropy (type) |
| 34 | 34 | 1.290845 | SimulationData | 4 | Scenario I | Fixed | Entropy (type) |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 13 | 20 | 0.000000 | SimulationData | 5 | Scenario IV | BA Randomization | ProbabilityIntervalMax |
| 13 | 20 | 0.000000 | SimulationData | 6 | Scenario IV | BA Randomization | ProbabilityIntervalMax |
| 13 | 20 | 0.000000 | SimulationData | 7 | Scenario IV | BA Randomization | ProbabilityIntervalMax |
| 13 | 20 | 0.000000 | SimulationData | 8 | Scenario IV | BA Randomization | ProbabilityIntervalMax |
| 13 | 20 | 0.000000 | SimulationData | 9 | Scenario IV | BA Randomization | ProbabilityIntervalMax |
720 rows × 7 columns
In [10]:
filtered_score_groupby = filtered_df.groupby(["model", "policy", "metric"])
means_table = filtered_score_groupby["score"].mean().unstack().unstack()
quantile_table = filtered_score_groupby["score"].quantile(0.75).unstack().unstack()
randomizations = ["AB Randomization", "BA Randomization"]
In [11]:
groupby_columns = ["model", "policy"]
pivoted_df = filtered_df.pivot(
index=["model", "policy", "simulation", "patient_id"],
columns="metric",
values="score",
)
table = pivoted_df.groupby(groupby_columns).mean()
table["Regret 0.75 quantile"] = pivoted_df.groupby(groupby_columns).quantile(0.75)[
"RegretAgainstOtherConfiguration(FixedPolicy)"
]
table = table.loc[(slice(None), randomizations), :]
In [12]:
table
Out[12]:
| metric | Entropy (type) | ProbabilityIntervalMax | ProbabilityIntervalMin | RegretAgainstOtherConfiguration(FixedPolicy) | std(duration) | std(intensity) | Regret 0.75 quantile | |
|---|---|---|---|---|---|---|---|---|
| model | policy | |||||||
| Scenario I | AB Randomization | 1.140387 | 0.0 | 0.0 | 0.000000 | 0.249303 | 0.298788 | 0.000000 |
| BA Randomization | 1.066846 | 0.0 | 0.0 | 0.000000 | 0.268321 | 0.311346 | 0.000000 | |
| Scenario II | AB Randomization | 0.682043 | 0.0 | 0.0 | -14.253217 | 0.203038 | 0.220925 | -7.824710 |
| BA Randomization | 0.678126 | 0.0 | 0.0 | -12.621704 | 0.206248 | 0.225740 | -4.707913 | |
| Scenario III | AB Randomization | 0.993269 | 0.0 | 0.0 | -5.562863 | 0.243266 | 0.275533 | -1.990651 |
| BA Randomization | 0.855366 | 0.0 | 0.0 | -6.861749 | 0.253340 | 0.280710 | -3.159108 | |
| Scenario IV | AB Randomization | 0.999499 | 0.0 | 0.0 | -7.686238 | 0.238633 | 0.278250 | -3.537417 |
| BA Randomization | 0.949294 | 0.0 | 0.0 | -6.189339 | 0.249756 | 0.277468 | -1.508569 |
In [13]:
print(
table[
[
"RegretAgainstOtherConfiguration(FixedPolicy)",
"Regret 0.75 quantile",
"Entropy (type)",
"std(duration)",
"std(intensity)",
"ProbabilityIntervalMax",
"ProbabilityIntervalMin",
]
]
.style.format(precision=2)
.to_latex()
)
\begin{tabular}{llrrrrrrr}
& metric & RegretAgainstOtherConfiguration(FixedPolicy) & Regret 0.75 quantile & Entropy (type) & std(duration) & std(intensity) & ProbabilityIntervalMax & ProbabilityIntervalMin \\
model & policy & & & & & & & \\
\multirow[c]{2}{*}{Scenario I} & AB Randomization & 0.00 & 0.00 & 1.14 & 0.25 & 0.30 & 0.00 & 0.00 \\
& BA Randomization & 0.00 & 0.00 & 1.07 & 0.27 & 0.31 & 0.00 & 0.00 \\
\multirow[c]{2}{*}{Scenario II} & AB Randomization & -14.25 & -7.82 & 0.68 & 0.20 & 0.22 & 0.00 & 0.00 \\
& BA Randomization & -12.62 & -4.71 & 0.68 & 0.21 & 0.23 & 0.00 & 0.00 \\
\multirow[c]{2}{*}{Scenario III} & AB Randomization & -5.56 & -1.99 & 0.99 & 0.24 & 0.28 & 0.00 & 0.00 \\
& BA Randomization & -6.86 & -3.16 & 0.86 & 0.25 & 0.28 & 0.00 & 0.00 \\
\multirow[c]{2}{*}{Scenario IV} & AB Randomization & -7.69 & -3.54 & 1.00 & 0.24 & 0.28 & 0.00 & 0.00 \\
& BA Randomization & -6.19 & -1.51 & 0.95 & 0.25 & 0.28 & 0.00 & 0.00 \\
\end{tabular}
Figures¶
In [ ]:
# AB Randomization Plot
series_to_plot = [
entry["result"]
for entry in calculated_series
if str(entry["result"].simulations[0].policy)
== "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)'])"
]
metrics = [
RegretAgainstOtherConfiguration(
outcome_name="pain_reduction",
name="FixedPolicy",
configuration_transform_function=switch_to_fixed_policy,
config_to_simulation_data=config_to_simulation_data,
),
]
SeriesOfSimulationsData.plot_lines(
series_to_plot, metrics, hue="model", legend_position=(0, 0.3)
)
plt.xlim(21, 34)
plt.ylim([-14, 1])
plt.savefig("regret_over_time_AB.pdf", bbox_inches="tight")
In [ ]:
# BA Randomization Plot
series_to_plot = [
entry["result"]
for entry in calculated_series
if str(entry["result"].simulations[0].policy)
== "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)', 'SelectionPolicyFixedPolicy\\n'])"
]
metrics = [
RegretAgainstOtherConfiguration(
outcome_name="pain_reduction",
name="FixedPolicy",
configuration_transform_function=switch_to_fixed_policy,
config_to_simulation_data=config_to_simulation_data,
),
]
SeriesOfSimulationsData.plot_lines(
series_to_plot, metrics, hue="model", legend_position=(0, 0.3)
)
plt.xlim(7, 20)
plt.ylim([-14, 1])
plt.savefig("regret_over_time_BA.pdf", bbox_inches="tight")
Calculate Entropy and Standard Deviation for the fixed treatment sequence¶
In [ ]:
df.loc[(df["policy"] == "Fixed") & (df["t"] == 13)].groupby("metric").mean(
numeric_only=True
)
In [ ]: